library(kableExtra)
library(tidyverse)

## Employment 

ba <- readRDS('data/BA_employment_muni.rds') %>%
  filter(state %in% c('Bayern', 'Nordrhein-Westfalen')) %>%
  dplyr::select(one_of('ags', 'date', 'emp_nat_pc', 'emp_ref_pc')) %>%
  ungroup()

## Def state_id_to_names

state_id_to_names <- function(state_id) {
  names <- recode(state_id, `01` = "Schleswig-Holstein", `02` = "Hamburg", 
                  `03` = "Niedersachsen", `04` = "Bremen", `05` = "North Rhine-Westphalia", 
                  `06` = "Hesse", `07` = "Rhineland-Palatinate", `08` = "Baden-Württemberg", 
                  `09` = "Bavaria", `10` = "Saarland", `11` = "Berlin", 
                  `12` = "Brandenburg", `13` = "Mecklenburg-Vorpommern", 
                  `14` = "Saxony", `15` = "Saxony-Anhalt", `16` = "Thuringia")
  return(names)
}

## Define Function 

summarize_data <- function(vector){
  
  mean_v <- mean(vector, na.rm = T)
  sd_v <- sd(vector, na.rm = T)
  n_obs <- length(vector[!is.na(vector)])
  min_v <- min(vector, na.rm = T)
  max_v <- max(vector, na.rm = T)
  
  
  data.frame(cbind(mean_v, sd_v, n_obs, min_v, max_v))
  
}

## BA Data 

ba_summary <- apply(ba[,c('emp_nat_pc', 'emp_ref_pc')], 2, summarize_data) %>% 
  reduce(rbind) %>% 
  mutate(varname = c('Native Employment (scaled by working age pop.)', 
                     'Refugee Employment (scaled by working age pop. in 1,000s)'),
         level = rep('Municipality', 2),
         subset = rep('Bavaria + NRW', 2),
         years = rep('2008 - 2018', 2),
         dataset = rep('Employment Data', 2))

## Wages

wages <- readRDS('data/wages_clean.rds') %>%
  mutate(state = state_id_to_names(substr(county_id, 1, 2))) %>%
  filter(state %in% c('North Rhine-Westphalia', 'Bavaria')) %>%
  dplyr::select(-one_of('wage_all_wohnort', 'wage_higheduc'))

wages_summary <- apply(wages[,names(wages)[str_detect(names(wages), 'wage')]], 2, summarize_data) %>% 
  reduce(rbind) %>% 
  mutate(varname = c('Monthly gross wage: all employees',
                     'Monthly gross wage: men',
                     'Monthly gross wage: women',
                     'Monthly gross wage: german natives',
                     'Monthly gross wage: foreigners',
                     'Monthly gross wage: 15 - 25 years old',
                     'Monthly gross wage: 25 - 55 years old',
                     'Monthly gross wage: 55 - 65 years old',
                     'Monthly gross wage: low education',
                     'Monthly gross wage: vocational training'),
         level = rep('County', 10),
         subset = rep('Bavaria + NRW', 10),
         years = rep('2014 -- 2018', 10),
         dataset = rep('Wages', 10))

## Federal Elections 

bt <- readRDS('data/federal_elections_clean.RDS') %>% 
  ungroup() %>%
  filter(state %in% c('Bayern', 'Nordrhein-Westfalen'))

bt$afd_party <- as.numeric(bt$afd_party)

bt_summary <- apply(bt[,c('afd_party', 'cdu_csu_party', 
                          'fdp_party', 'greens_party', 'left_party',
                          'n_votes_party', 
                          'spd_party', 
                          'right_total_party',
                          'left_total_party',
                          'turnout_party')], 
                    2, summarize_data) %>% 
  reduce(rbind) %>% 
  mutate(varname = c('AfD vote share', 
                     'CDU/CSU vote share',
                     'FDP vote share',
                     'Greens vote share',
                     'Die Linke vote share',
                     'Number of Votes Cast',
                     'SPD vote share',
                     'Total right vote share',
                     'Total left vote share',
                     'Turnout in %'),
         level = rep('Municipality', 10),
         subset = rep('Bavaria + NRW', 10),
         years = rep('2013 -- 2017', 10),
         dataset = rep('Federal Elections', 10))

## Bavarian State Elections 

## State elections

# Bayern:
# totright2_nofdp = CDU/CSU + AfD 
# totleft2 = SPD and Greens (note that Left is part of other for BY)

bay <- readRDS('data/bavaria_elections_clean.rds') %>%
  filter(year %in% c(2013, 2018)) %>%
  dplyr::select(-year) %>%
  mutate(turnout = turnout / 100) 

outcomevars <- c("total_votes_2", "CSU_2", "SPD_2", 
                 "Greens_2", "FDP_2", 
                 "AFD_2", 
                 "totright2_nofdp",
                 "totleft2", "turnout")

## 

bay <- bay %>% 
  mutate_at(outcomevars, ~ .*100)

bay_summary <- apply(bay[,outcomevars], 2, summarize_data) %>% 
  reduce(rbind) %>% 
  mutate(varname = c('Number of Votes Cast',
                     'CSU vote share',
                     'SPD vote share',
                     'Greens vote share',
                     'FDP vote share',
                     'AfD vote share', 
                     'Total right vote share',
                     'Total left vote share',
                     'Turnout in %'),
         level = rep('Municipality', 9),
         subset = rep('Bavaria', 9),
         years = rep('2013 -- 2018', 9),
         dataset = rep('Bavarian State Elections', 9))


## NRW Elections 

# NRW:
# left_total = SPD + Greens + Left
# totright2_nofdp = CDU/CSU + AfD 

nrw <- readRDS('data/state_elections_clean.RDS') %>%
  filter(land == '05') %>%
  dplyr::select(1:15) %>% 
  mutate(year = lubridate::year(date)) %>%
  filter(!year == 2010) %>% 
  mutate(post = ifelse(year == 2017, 1, 0)) %>%
  mutate(totright2_nofdp = cdu_csu + ifelse(year == 2017, afd, 0))

outcomevars <- c('cdu_csu',
                 'spd', 
                 'fdp', 
                 'greens', 
                 'afd', 
                 'left',                   
                 'totright2_nofdp',
                 'left_total',
                 'turnout')

nrw_summary <- apply(nrw[,outcomevars], 2, summarize_data) %>% 
  reduce(rbind) %>% 
  mutate(varname = c('CDU vote share',
                     'SPD vote share',
                     'FDP vote share',
                     'Greens vote share',
                     'AfD vote share', 
                     'Die Linke vote share',
                     'Total right vote share',
                     'Total left vote share',
                     'Turnout in %'),
         level = rep('Municipality', 9),
         subset = rep('NRW', 9),
         years = rep('2012 -- 2017', 9),
         dataset = rep('NRW State Elections', 9))


### Combine all summary stats

summary_all <- rbind(ba_summary, wages_summary, 
                     bt_summary, bay_summary, nrw_summary) %>%
  mutate_at(vars(1:5), round, 2) %>%
  dplyr::select(varname, dataset, level, years, subset, everything()) %>%
  filter(varname != 'Number of Votes Cast')

summary_all

## To Table 

names(summary_all) <- c('Variable', 'Dataset', 'Level of Analysis', 'Time-period', 'Subset', 'Mean', 'S.D.', 'N', 'Min', 'Max')


summary_all %>%
  kableExtra::kable(., format = 'latex', 
                    booktabs = T,
                    caption = 'Summary Statistics') %>%
  kable_styling(latex_options = c("hold_position", 'scale_down'), 
                position = "center") %>%
  landscape()





